/* NoX (NoC Simulator)
 *
 * Dept. of Computer Science & Engineering, Pennsylvania State University.
 * All Rights Reserved.
 *  
 * 1. License     
 * NoX is distributed free of charge for academic, educational, noncommercial 
 * research purposes as long as this notice in its entirety is preserved in
 * every file included in this package.
 * All commercial use of this program requires separate licence. Contact the
 * author for details.
 * 
 * 2. All the publications that used the simulation results generated by the 
 * NoX should notify the author of the publication information and put 
 * following reference.
 *
 *  http://www.cse.psu.edu/~dpark/nox/
 * 
 * 3. Modification of the source code is permitted and encouraged as long as 
 * it follows the terms described in this copyright notice.
 *
 * 4. The author is not responsible for any problems caused by possible errors
 * of the NoX package. Therefore, users should verify the simulation result
 * before using it in their publication.
 *
 * Dept. of Computer Science & Engineering, Pennsylvania State University.
 * Contact: dpark@cse.psu.edu 
 * 
 * 6. If problems are found with the NoX package, please send an email to the
 * author for discussion and correction.

*/

/* Update History
 *
 * Jan. 31, 2006  Version 1.0 released by Dongkook Park 
 *
 */

/* LINK.C - link traversal stage */

#include <stdio.h>
#include <stdlib.h>
#include "main.h"
#include "router.h"
#include "nic.h"
#include "shared.h"
#include "batch.h"
#include "rank.h"


void link_stage()
{
  // This stage handles data transfer through the link.
  int node, pc, vc, next_node, next_pc;
  flit_t *flit_ptr;

  for(node=0; node<NUM_NODES; node++)
  {
    NUM_PC     = router_info[node].num_pc;
    NUM_NIC_PC = router_info[node].num_nic_pc;
    NUM_VC     = router_info[node].num_vc;
    for(pc=0; pc<NUM_PC-NUM_NIC_PC; pc++)
    {
      // Get the node number of the neighbor connected by the physical channel pc.
      next_node = neighbor[node][pc];
      next_pc = neighbor_pc[node][pc];
      for(vc=0; vc<NUM_VC; vc++)
      {
        if( (msg_cnt(&(link_buf[node][pc][vc]))) >= 1 )
        {
          recv_flit(&(link_buf[node][pc][vc]), &flit_ptr);
          if(link_info[node][pc] >= 1)
          {
            if(verbose == YES) 
            { 
              printf("Link stage 1 [%d][%d][%d]-flit:%d(%s) \n", 
                  node, pc, vc, flit_ptr->flit_num, (HEAD_FLIT)?
                  "HEAD":(TAIL_FLIT)?"TAIL":"MIDDLE"); 
              fflush(stdout); 
            }

            send_flit(&(link_buf2[node][pc][vc]), &flit_ptr);
            continue;
          }

          if(verbose == YES)
          {
            printf("[%d][%d][%d] rinbuf :%d vc_stat:%s\n",next_node,next_pc,vc,msg_cnt(&(router_input_buf[next_node][next_pc][vc])),
                vc_state[vc_info[next_node][next_pc][vc].vc_stat]);
            print_mbox(&(router_input_buf[next_node][next_pc][vc]));
          }

          if(sql == YES)
            fprintf(fsql, "INSERT INTO FLOW VALUES(0, %d,%d,%d,%d,%d,%d);\n", 
                flit_ptr->flit_num, node, pc, vc, 4, sim_clock);
          if(verbose == YES) 
          { 
            printf("Link    [%d][%d][%d]-flit:%d(%s) to rinbuf[%d][%d][%d]\n", 
                node, pc, vc, flit_ptr->flit_num, (HEAD_FLIT)?
                "HEAD":(TAIL_FLIT)?"TAIL":"MIDDLE", 
                next_node, next_pc, vc);
            fflush(stdout); 
          }

          /*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*/
          // Power
          if(router_info[node].type == LOCAL || router_info[next_node].type == LOCAL)
            p_link += 0.0;
          else
            p_link += PD_LINK;
          /*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*/

          // Reset entry time of the flit.
          flit_ptr->entry_time = sim_clock;

          if(verbose == YES)
            printf("Setting entry time [%d][%d][%d]-flit:%d(%s) : %lld\n",node, pc, vc, 
                flit_ptr->flit_num, (HEAD_FLIT)? "HEAD": 
                (TAIL_FLIT)? "TAIL":"MIDDLE", (long long)sim_clock);

          //--------------------------------------------------------------------- 
          // Generate LINK soft-error randomly...
          if(flit_ptr->is_nack == NO) // Generate error only for non-NACK packet.
          {
            if(((float)rand()/(float)RAND_MAX < link_err_rate)? 1:0 ) 
            {
              flit_ptr->error |= ERROR_LINK;
              num_link_err++;
            }
          }
          //--------------------------------------------------------------------- 

          /*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*/
          // Power
          // Add router buffer write power.
          p_buffer += PD_BUFCTR + PD_BUFFER;
          /*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*/

          // If any of soft error detection/recovery scheme is used, we need to 
          // double router input buffer since we use double-buffering scheme. 
          if(link_err_rate + routing_err_rate + swarbiter_err_rate > 0) 
            p_buffer += PD_BUFCTR + PD_BUFFER;



          // Check for the ejection flits.
          // Here, we assume that only one ejection channel is used. (pc=0)
          if( (early_ejection == YES) && (flit_ptr->data.dnode == next_node))
          {
            // If any of soft error detection/recovery scheme is used, 
            // restore router input buffer power added above since it's 
            // directly ejected without going to router input buffer.
            if(link_err_rate + routing_err_rate + swarbiter_err_rate > 0) 
              p_buffer -= PD_BUFCTR + PD_BUFFER;

            send_flit(&(nic_input_buf[next_node][0][vc]), &flit_ptr);
          }
          // If no early ejection or non-ejecting flits
          else
          {
            /*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*/
            // start batching support
            /*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*/
            //Update stat counters for ranking
            if(ranking_type[next_node] == RANKING_LOCAL_SJF) 
              local_sjf_counter[next_node][flit_ptr->priority_id]++;

            /*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*/
            //local batching book keeping for current node
            if(batching_mode[node] == BATCHING_LOCAL_FULLBATCHING)
              if(flit_ptr->batch_id == current_batch[node])
              {
                local_source_jobs_done[node]++;
                if(verbose == YES)
                  printf("Updating local_source_jobs_done:%d for flit:%d\n",local_source_jobs_done[node],flit_ptr->flit_num);
              }
              else
                if(verbose == YES)
                  printf("Non markerd flit:%d\n",flit_ptr->flit_num);
            //local batching book keeping for next node
            if(batching_mode[next_node] == BATCHING_LOCAL_FULLBATCHING)
            {
              if(local_batch_marking_counter[next_node] < marking_cap[next_node])
              {
                local_batch_marking_counter[next_node]++;
                flit_ptr->marking_weight = 2;
                flit_ptr->batch_id = current_batch[next_node];

                //if(next_node == 56)
                if(verbose == YES)
                {
                  if(local_batch_marking_counter[next_node] == marking_cap[next_node])
                    printf("Finished marking  batch[%d] :( to %lld  at %lld\n",next_node,current_batch[next_node],sim_clock);
                  else
                    printf("Marking flit:%d counter:%d\n",flit_ptr->flit_num,local_batch_marking_counter[next_node]);
                }

              }
              else
              {
                flit_ptr->marking_weight = 1;
                flit_ptr->batch_id = -1;
              }
            }
            /*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*/
            //local batching time based book keeping
            if(batching_mode[next_node] == BATCHING_LOCAL_TIMEBASED )
            {
              local_batch_marking_counter[next_node]++;
              flit_ptr->batch_id       = last_batch_id[next_node];
              flit_ptr->marking_weight = MAX_BATCH_ID - flit_ptr->batch_id + 1;
            }
            /*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*/
            // end batching support
            /*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*/

            /*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*/
            // start hybrid topology
            /*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*/
            local_msg_len = flit_ptr->llen;
            global_msg_len  = flit_ptr->msglen;

            if(pc == 0 && router_info[node].type == LOCAL && (TAIL_FLIT) && flit_ptr->msgtype != CONTROL)
            {
              flit_t *body_flit_ptr, *tmp;
              if(HEAD_FLIT)
                flit_ptr->flit_type = HEAD;
              else
                flit_ptr->flit_type     = MIDL;

              if(local_msg_len == global_msg_len)
                flit_ptr->flit_type     = TAIL;

              tmp = flit_ptr;
              send_flit(&(router_input_buf[next_node][next_pc][vc]), &flit_ptr);
              for(int pos = local_msg_len; pos <global_msg_len; pos++)
              {
                body_flit_ptr  = flit_alloc();
                make_body_flit(body_flit_ptr,tmp); 
                if(pos == global_msg_len - 1)
                  body_flit_ptr->flit_type  = TAIL;
                tmp = body_flit_ptr;
                if(verbose == YES) 
                { printf("Inject local to global [%d][%d][%d]-flit:%d, dnode:%d dest:%d\n", next_node, next_pc, 
                    vc, body_flit_ptr->flit_num, body_flit_ptr->data.dnode, body_flit_ptr->data.dest); fflush(stdout);}

                    send_flit(&(router_input_buf[next_node][next_pc][vc]), &body_flit_ptr);
              }

              if(verbose == YES)
                print_mbox(&(router_input_buf[next_node][next_pc][vc]));
            }// end split flits case

            else if(router_info[next_node].type == LOCAL)
            {
              if(flit_ptr->pos < local_msg_len)
              {
                if(local_msg_len == 1)
                  flit_ptr->flit_type = (HEAD | TAIL);
                else if(flit_ptr->pos == local_msg_len-1)
                  flit_ptr->flit_type = TAIL;
                send_flit(&(router_input_buf[next_node][next_pc][vc]), &flit_ptr);
              }
              else
              {
                if(verbose == YES) 
                { printf("Squashing global to local [%d][%d][%d]-flit:%d(%s), dnode:%d dest:%d\n", next_node, next_pc, 
                    vc, flit_ptr->flit_num,"MT", flit_ptr->data.dnode, flit_ptr->data.dest); fflush(stdout);
                }
                flit_free(flit_ptr);
              }
            } // end squash flits case
            /*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*/
            // end hybrid topology
            /*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*/
            else
              send_flit(&(router_input_buf[next_node][next_pc][vc]), &flit_ptr);
          }
          // Keep track of the flit activity.
          if(sim_clock > warmup_cycle)
            total_activity[node]++;
        }
      }// for vc
    }// for pc
  }// for node

}

void link_stage2()
{
  // This stage handles data transfer through the link.
  int node, pc, vc, next_node, next_pc;
  flit_t *flit_ptr;

  for(node=0; node<NUM_NODES; node++)
  {
    NUM_PC     = router_info[node].num_pc;
    NUM_NIC_PC = router_info[node].num_nic_pc;
    NUM_VC     = router_info[node].num_vc;
    for(pc=0; pc<NUM_PC-NUM_NIC_PC; pc++)
    {
      // Get the node number of the neighbor connected by the physical channel pc.
      next_node = neighbor[node][pc];
      next_pc = neighbor_pc[node][pc];
      for(vc=0; vc<NUM_VC; vc++)
      {
        if( (msg_cnt(&(link_buf2[node][pc][vc]))) >= 1 )
        {
          recv_flit(&(link_buf2[node][pc][vc]), &flit_ptr);
          if(link_info[node][pc] >= 2)
          {
            if(verbose == YES) 
            { 
              printf("Link stage 2 [%d][%d][%d]-flit:%d(%s) \n", 
                  node, pc, vc, flit_ptr->flit_num, (HEAD_FLIT)?
                  "HEAD":(TAIL_FLIT)?"TAIL":"MIDDLE"); 
              fflush(stdout); 
            }

            send_flit(&(link_buf3[node][pc][vc]), &flit_ptr);
            continue;
          }

          if(verbose == YES)
          {
            printf("[%d][%d][%d] rinbuf :%d vc_stat:%s\n",next_node,next_pc,vc,msg_cnt(&(router_input_buf[next_node][next_pc][vc])),
                vc_state[vc_info[next_node][next_pc][vc].vc_stat]);
            print_mbox(&(router_input_buf[next_node][next_pc][vc]));
          }

          if(sql == YES)
            fprintf(fsql, "INSERT INTO FLOW VALUES(0, %d,%d,%d,%d,%d,%d);\n", 
                flit_ptr->flit_num, node, pc, vc, 4, sim_clock);
          if(verbose == YES) 
          { 
            printf("Link    [%d][%d][%d]-flit:%d(%s) to rinbuf[%d][%d][%d]\n", 
                node, pc, vc, flit_ptr->flit_num, (HEAD_FLIT)?
                "HEAD":(TAIL_FLIT)?"TAIL":"MIDDLE", 
                next_node, next_pc, vc);
            fflush(stdout); 
          }

          /*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*/
          // Power
          if(router_info[node].type == LOCAL || router_info[next_node].type == LOCAL)
            p_link += 0.0;
          else
            p_link += PD_LINK;
          /*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*/

          // Reset entry time of the flit.
          flit_ptr->entry_time = sim_clock;

          if(verbose == YES)
            printf("Setting entry time [%d][%d][%d]-flit:%d(%s) : %lld\n",node, pc, vc, 
                flit_ptr->flit_num, (HEAD_FLIT)? "HEAD": 
                (TAIL_FLIT)? "TAIL":"MIDDLE", (long long)sim_clock);

          //--------------------------------------------------------------------- 
          // Generate LINK soft-error randomly...
          if(flit_ptr->is_nack == NO) // Generate error only for non-NACK packet.
          {
            if(((float)rand()/(float)RAND_MAX < link_err_rate)? 1:0 ) 
            {
              flit_ptr->error |= ERROR_LINK;
              num_link_err++;
            }
          }
          //--------------------------------------------------------------------- 

          /*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*/
          // Power
          // Add router buffer write power.
          p_buffer += PD_BUFCTR + PD_BUFFER;
          /*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*/

          // If any of soft error detection/recovery scheme is used, we need to 
          // double router input buffer since we use double-buffering scheme. 
          if(link_err_rate + routing_err_rate + swarbiter_err_rate > 0) 
            p_buffer += PD_BUFCTR + PD_BUFFER;



          // Check for the ejection flits.
          // Here, we assume that only one ejection channel is used. (pc=0)
          if( (early_ejection == YES) && (flit_ptr->data.dnode == next_node))
          {
            // If any of soft error detection/recovery scheme is used, 
            // restore router input buffer power added above since it's 
            // directly ejected without going to router input buffer.
            if(link_err_rate + routing_err_rate + swarbiter_err_rate > 0) 
              p_buffer -= PD_BUFCTR + PD_BUFFER;

            send_flit(&(nic_input_buf[next_node][0][vc]), &flit_ptr);
          }
          // If no early ejection or non-ejecting flits
          else
          {
            /*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*/
            // start batching support
            /*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*/
            //Update stat counters for ranking
            if(ranking_type[next_node] == RANKING_LOCAL_SJF) 
              local_sjf_counter[next_node][flit_ptr->priority_id]++;

            /*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*/
            //local batching book keeping for current node
            if(batching_mode[node] == BATCHING_LOCAL_FULLBATCHING)
              if(flit_ptr->batch_id == current_batch[node])
              {
                local_source_jobs_done[node]++;
                if(verbose == YES)
                  printf("Updating local_source_jobs_done:%d for flit:%d\n",local_source_jobs_done[node],flit_ptr->flit_num);
              }
              else
                if(verbose == YES)
                  printf("Non markerd flit:%d\n",flit_ptr->flit_num);
            //local batching book keeping for next node
            if(batching_mode[next_node] == BATCHING_LOCAL_FULLBATCHING)
            {
              if(local_batch_marking_counter[next_node] < marking_cap[next_node])
              {
                local_batch_marking_counter[next_node]++;
                flit_ptr->marking_weight = 2;
                flit_ptr->batch_id = current_batch[next_node];

                //if(next_node == 56)
                if(verbose == YES)
                {
                  if(local_batch_marking_counter[next_node] == marking_cap[next_node])
                    printf("Finished marking  batch[%d] :( to %lld  at %lld\n",next_node,current_batch[next_node],sim_clock);
                  else
                    printf("Marking flit:%d counter:%d\n",flit_ptr->flit_num,local_batch_marking_counter[next_node]);
                }

              }
              else
              {
                flit_ptr->marking_weight = 1;
                flit_ptr->batch_id = -1;
              }
            }
            /*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*/
            //local batching time based book keeping
            if(batching_mode[next_node] == BATCHING_LOCAL_TIMEBASED )
            {
              local_batch_marking_counter[next_node]++;
              flit_ptr->batch_id       = last_batch_id[next_node];
              flit_ptr->marking_weight = MAX_BATCH_ID - flit_ptr->batch_id + 1;
            }
            /*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*/
            // end batching support
            /*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*/

            /*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*/
            // start hybrid topology
            /*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*/
            local_msg_len = flit_ptr->llen;
            global_msg_len  = flit_ptr->msglen;

            if(pc == 0 && router_info[node].type == LOCAL && (TAIL_FLIT) && flit_ptr->msgtype != CONTROL)
            {
              flit_t *body_flit_ptr, *tmp;
              if(HEAD_FLIT)
                flit_ptr->flit_type = HEAD;
              else
                flit_ptr->flit_type     = MIDL;

              if(local_msg_len == global_msg_len)
                flit_ptr->flit_type     = TAIL;

              tmp = flit_ptr;
              send_flit(&(router_input_buf[next_node][next_pc][vc]), &flit_ptr);
              for(int pos = local_msg_len; pos <global_msg_len; pos++)
              {
                body_flit_ptr  = flit_alloc();
                make_body_flit(body_flit_ptr,tmp); 
                if(pos == global_msg_len - 1)
                  body_flit_ptr->flit_type  = TAIL;
                tmp = body_flit_ptr;
                if(verbose == YES) 
                { printf("Inject local to global [%d][%d][%d]-flit:%d, dnode:%d dest:%d\n", next_node, next_pc, 
                    vc, body_flit_ptr->flit_num, body_flit_ptr->data.dnode, body_flit_ptr->data.dest); fflush(stdout);}

                    send_flit(&(router_input_buf[next_node][next_pc][vc]), &body_flit_ptr);
              }

              if(verbose == YES)
                print_mbox(&(router_input_buf[next_node][next_pc][vc]));
            }// end split flits case

            else if(router_info[next_node].type == LOCAL)
            {
              if(flit_ptr->pos < local_msg_len)
              {
                if(local_msg_len == 1)
                  flit_ptr->flit_type = (HEAD | TAIL);
                else if(flit_ptr->pos == local_msg_len-1)
                  flit_ptr->flit_type = TAIL;
                send_flit(&(router_input_buf[next_node][next_pc][vc]), &flit_ptr);
              }
              else
              {
                if(verbose == YES) 
                { printf("Squashing global to local [%d][%d][%d]-flit:%d(%s), dnode:%d dest:%d\n", next_node, next_pc, 
                    vc, flit_ptr->flit_num,"MT", flit_ptr->data.dnode, flit_ptr->data.dest); fflush(stdout);
                }
                flit_free(flit_ptr);
              }
            } // end squash flits case
            /*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*/
            // end hybrid topology
            /*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*/
            else
              send_flit(&(router_input_buf[next_node][next_pc][vc]), &flit_ptr);
          }
          // Keep track of the flit activity.
          if(sim_clock > warmup_cycle)
            total_activity[node]++;
        }
      }// for vc
    }// for pc
  }// for node

}

void link_stage3()
{
  // This stage handles data transfer through the link.
  int node, pc, vc, next_node, next_pc;
  flit_t *flit_ptr;

  for(node=0; node<NUM_NODES; node++)
  {
    NUM_PC     = router_info[node].num_pc;
    NUM_NIC_PC = router_info[node].num_nic_pc;
    NUM_VC     = router_info[node].num_vc;
    for(pc=0; pc<NUM_PC-NUM_NIC_PC; pc++)
    {
      // Get the node number of the neighbor connected by the physical channel pc.
      next_node = neighbor[node][pc];
      next_pc = neighbor_pc[node][pc];
      for(vc=0; vc<NUM_VC; vc++)
      {
        if( (msg_cnt(&(link_buf3[node][pc][vc]))) >= 1 )
        {
          recv_flit(&(link_buf3[node][pc][vc]), &flit_ptr);
          if(link_info[node][pc] >= 3)
          {
            if(verbose == YES) 
            { 
              printf("Link stage 3 [%d][%d][%d]-flit:%d(%s) \n", 
                  node, pc, vc, flit_ptr->flit_num, (HEAD_FLIT)?
                  "HEAD":(TAIL_FLIT)?"TAIL":"MIDDLE"); 
              fflush(stdout); 
            }

            send_flit(&(link_buf4[node][pc][vc]), &flit_ptr);
            continue;
          }
          if(verbose == YES)
          {
            printf("[%d][%d][%d] rinbuf :%d vc_stat:%s\n",next_node,next_pc,vc,msg_cnt(&(router_input_buf[next_node][next_pc][vc])),
                vc_state[vc_info[next_node][next_pc][vc].vc_stat]);
            print_mbox(&(router_input_buf[next_node][next_pc][vc]));
          }

          if(sql == YES)
            fprintf(fsql, "INSERT INTO FLOW VALUES(0, %d,%d,%d,%d,%d,%d);\n", 
                flit_ptr->flit_num, node, pc, vc, 4, sim_clock);
          if(verbose == YES) 
          { 
            printf("Link    [%d][%d][%d]-flit:%d(%s) to rinbuf[%d][%d][%d]\n", 
                node, pc, vc, flit_ptr->flit_num, (HEAD_FLIT)?
                "HEAD":(TAIL_FLIT)?"TAIL":"MIDDLE", 
                next_node, next_pc, vc);
            fflush(stdout); 
          }

          /*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*/
          // Power
          if(router_info[node].type == LOCAL || router_info[next_node].type == LOCAL)
            p_link += 0.0;
          else
            p_link += PD_LINK;
          /*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*/

          // Reset entry time of the flit.
          flit_ptr->entry_time = sim_clock;

          if(verbose == YES)
            printf("Setting entry time [%d][%d][%d]-flit:%d(%s) : %lld\n",node, pc, vc, 
                flit_ptr->flit_num, (HEAD_FLIT)? "HEAD": 
                (TAIL_FLIT)? "TAIL":"MIDDLE", (long long)sim_clock);

          //--------------------------------------------------------------------- 
          // Generate LINK soft-error randomly...
          if(flit_ptr->is_nack == NO) // Generate error only for non-NACK packet.
          {
            if(((float)rand()/(float)RAND_MAX < link_err_rate)? 1:0 ) 
            {
              flit_ptr->error |= ERROR_LINK;
              num_link_err++;
            }
          }
          //--------------------------------------------------------------------- 

          /*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*/
          // Power
          // Add router buffer write power.
          p_buffer += PD_BUFCTR + PD_BUFFER;
          /*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*/

          // If any of soft error detection/recovery scheme is used, we need to 
          // double router input buffer since we use double-buffering scheme. 
          if(link_err_rate + routing_err_rate + swarbiter_err_rate > 0) 
            p_buffer += PD_BUFCTR + PD_BUFFER;



          // Check for the ejection flits.
          // Here, we assume that only one ejection channel is used. (pc=0)
          if( (early_ejection == YES) && (flit_ptr->data.dnode == next_node))
          {
            // If any of soft error detection/recovery scheme is used, 
            // restore router input buffer power added above since it's 
            // directly ejected without going to router input buffer.
            if(link_err_rate + routing_err_rate + swarbiter_err_rate > 0) 
              p_buffer -= PD_BUFCTR + PD_BUFFER;

            send_flit(&(nic_input_buf[next_node][0][vc]), &flit_ptr);
          }
          // If no early ejection or non-ejecting flits
          else
          {
            /*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*/
            // start batching support
            /*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*/
            //Update stat counters for ranking
            if(ranking_type[next_node] == RANKING_LOCAL_SJF) 
              local_sjf_counter[next_node][flit_ptr->priority_id]++;

            /*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*/
            //local batching book keeping for current node
            if(batching_mode[node] == BATCHING_LOCAL_FULLBATCHING)
              if(flit_ptr->batch_id == current_batch[node])
              {
                local_source_jobs_done[node]++;
                if(verbose == YES)
                  printf("Updating local_source_jobs_done:%d for flit:%d\n",local_source_jobs_done[node],flit_ptr->flit_num);
              }
              else
                if(verbose == YES)
                  printf("Non markerd flit:%d\n",flit_ptr->flit_num);
            //local batching book keeping for next node
            if(batching_mode[next_node] == BATCHING_LOCAL_FULLBATCHING)
            {
              if(local_batch_marking_counter[next_node] < marking_cap[next_node])
              {
                local_batch_marking_counter[next_node]++;
                flit_ptr->marking_weight = 2;
                flit_ptr->batch_id = current_batch[next_node];

                //if(next_node == 56)
                if(verbose == YES)
                {
                  if(local_batch_marking_counter[next_node] == marking_cap[next_node])
                    printf("Finished marking  batch[%d] :( to %lld  at %lld\n",next_node,current_batch[next_node],sim_clock);
                  else
                    printf("Marking flit:%d counter:%d\n",flit_ptr->flit_num,local_batch_marking_counter[next_node]);
                }

              }
              else
              {
                flit_ptr->marking_weight = 1;
                flit_ptr->batch_id = -1;
              }
            }
            /*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*/
            //local batching time based book keeping
            if(batching_mode[next_node] == BATCHING_LOCAL_TIMEBASED )
            {
              local_batch_marking_counter[next_node]++;
              flit_ptr->batch_id       = last_batch_id[next_node];
              flit_ptr->marking_weight = MAX_BATCH_ID - flit_ptr->batch_id + 1;
            }
            /*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*/
            // end batching support
            /*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*/

            /*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*/
            // start hybrid topology
            /*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*/
            local_msg_len = flit_ptr->llen;
            global_msg_len  = flit_ptr->msglen;

            if(pc == 0 && router_info[node].type == LOCAL && (TAIL_FLIT) && flit_ptr->msgtype != CONTROL)
            {
              flit_t *body_flit_ptr, *tmp;
              if(HEAD_FLIT)
                flit_ptr->flit_type = HEAD;
              else
                flit_ptr->flit_type     = MIDL;

              if(local_msg_len == global_msg_len)
                flit_ptr->flit_type     = TAIL;

              tmp = flit_ptr;
              send_flit(&(router_input_buf[next_node][next_pc][vc]), &flit_ptr);
              for(int pos = local_msg_len; pos <global_msg_len; pos++)
              {
                body_flit_ptr  = flit_alloc();
                make_body_flit(body_flit_ptr,tmp); 
                if(pos == global_msg_len - 1)
                  body_flit_ptr->flit_type  = TAIL;
                tmp = body_flit_ptr;
                if(verbose == YES) 
                { printf("Inject local to global [%d][%d][%d]-flit:%d, dnode:%d dest:%d\n", next_node, next_pc, 
                    vc, body_flit_ptr->flit_num, body_flit_ptr->data.dnode, body_flit_ptr->data.dest); fflush(stdout);}

                    send_flit(&(router_input_buf[next_node][next_pc][vc]), &body_flit_ptr);
              }

              if(verbose == YES)
                print_mbox(&(router_input_buf[next_node][next_pc][vc]));
            }// end split flits case

            else if(router_info[next_node].type == LOCAL)
            {
              if(flit_ptr->pos < local_msg_len)
              {
                if(local_msg_len == 1)
                  flit_ptr->flit_type = (HEAD | TAIL);
                else if(flit_ptr->pos == local_msg_len-1)
                  flit_ptr->flit_type = TAIL;
                send_flit(&(router_input_buf[next_node][next_pc][vc]), &flit_ptr);
              }
              else
              {
                if(verbose == YES) 
                { printf("Squashing global to local [%d][%d][%d]-flit:%d(%s), dnode:%d dest:%d\n", next_node, next_pc, 
                    vc, flit_ptr->flit_num,"MT", flit_ptr->data.dnode, flit_ptr->data.dest); fflush(stdout);
                }
                flit_free(flit_ptr);
              }
            } // end squash flits case
            /*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*/
            // end hybrid topology
            /*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*/
            else
              send_flit(&(router_input_buf[next_node][next_pc][vc]), &flit_ptr);
          }
          // Keep track of the flit activity.
          if(sim_clock > warmup_cycle)
            total_activity[node]++;
        }
      }// for vc
    }// for pc
  }// for node

}


void link_stage4()
{
  // This stage handles data transfer through the link.
  int node, pc, vc, next_node, next_pc;
  flit_t *flit_ptr;

  for(node=0; node<NUM_NODES; node++)
  {
    NUM_PC     = router_info[node].num_pc;
    NUM_NIC_PC = router_info[node].num_nic_pc;
    NUM_VC     = router_info[node].num_vc;
    for(pc=0; pc<NUM_PC-NUM_NIC_PC; pc++)
    {
      // Get the node number of the neighbor connected by the physical channel pc.
      next_node = neighbor[node][pc];
      next_pc = neighbor_pc[node][pc];
      for(vc=0; vc<NUM_VC; vc++)
      {
        if( (msg_cnt(&(link_buf4[node][pc][vc]))) >= 1 )
        {
          recv_flit(&(link_buf4[node][pc][vc]), &flit_ptr);

          if(verbose == YES)
          {
            printf("[%d][%d][%d] rinbuf :%d vc_stat:%s\n",next_node,next_pc,vc,msg_cnt(&(router_input_buf[next_node][next_pc][vc])),
                vc_state[vc_info[next_node][next_pc][vc].vc_stat]);
            print_mbox(&(router_input_buf[next_node][next_pc][vc]));
          }

          if(sql == YES)
            fprintf(fsql, "INSERT INTO FLOW VALUES(0, %d,%d,%d,%d,%d,%d);\n", 
                flit_ptr->flit_num, node, pc, vc, 4, sim_clock);
          if(verbose == YES) 
          { 
            printf("Link    [%d][%d][%d]-flit:%d(%s) to rinbuf[%d][%d][%d]\n", 
                node, pc, vc, flit_ptr->flit_num, (HEAD_FLIT)?
                "HEAD":(TAIL_FLIT)?"TAIL":"MIDDLE", 
                next_node, next_pc, vc);
            fflush(stdout); 
          }

          /*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*/
          // Power
          if(router_info[node].type == LOCAL || router_info[next_node].type == LOCAL)
            p_link += 0.0;
          else
            p_link += PD_LINK;
          /*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*/

          // Reset entry time of the flit.
          flit_ptr->entry_time = sim_clock;

          if(verbose == YES)
            printf("Setting entry time [%d][%d][%d]-flit:%d(%s) : %lld\n",node, pc, vc, 
                flit_ptr->flit_num, (HEAD_FLIT)? "HEAD": 
                (TAIL_FLIT)? "TAIL":"MIDDLE", (long long)sim_clock);

          //--------------------------------------------------------------------- 
          // Generate LINK soft-error randomly...
          if(flit_ptr->is_nack == NO) // Generate error only for non-NACK packet.
          {
            if(((float)rand()/(float)RAND_MAX < link_err_rate)? 1:0 ) 
            {
              flit_ptr->error |= ERROR_LINK;
              num_link_err++;
            }
          }
          //--------------------------------------------------------------------- 

          /*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*/
          // Power
          // Add router buffer write power.
          p_buffer += PD_BUFCTR + PD_BUFFER;
          /*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*/

          // If any of soft error detection/recovery scheme is used, we need to 
          // double router input buffer since we use double-buffering scheme. 
          if(link_err_rate + routing_err_rate + swarbiter_err_rate > 0) 
            p_buffer += PD_BUFCTR + PD_BUFFER;



          // Check for the ejection flits.
          // Here, we assume that only one ejection channel is used. (pc=0)
          if( (early_ejection == YES) && (flit_ptr->data.dnode == next_node))
          {
            // If any of soft error detection/recovery scheme is used, 
            // restore router input buffer power added above since it's 
            // directly ejected without going to router input buffer.
            if(link_err_rate + routing_err_rate + swarbiter_err_rate > 0) 
              p_buffer -= PD_BUFCTR + PD_BUFFER;

            send_flit(&(nic_input_buf[next_node][0][vc]), &flit_ptr);
          }
          // If no early ejection or non-ejecting flits
          else
          {
            /*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*/
            // start batching support
            /*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*/
            //Update stat counters for ranking
            if(ranking_type[next_node] == RANKING_LOCAL_SJF) 
              local_sjf_counter[next_node][flit_ptr->priority_id]++;

            /*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*/
            //local batching book keeping for current node
            if(batching_mode[node] == BATCHING_LOCAL_FULLBATCHING)
              if(flit_ptr->batch_id == current_batch[node])
              {
                local_source_jobs_done[node]++;
                if(verbose == YES)
                  printf("Updating local_source_jobs_done:%d for flit:%d\n",local_source_jobs_done[node],flit_ptr->flit_num);
              }
              else
                if(verbose == YES)
                  printf("Non markerd flit:%d\n",flit_ptr->flit_num);
            //local batching book keeping for next node
            if(batching_mode[next_node] == BATCHING_LOCAL_FULLBATCHING)
            {
              if(local_batch_marking_counter[next_node] < marking_cap[next_node])
              {
                local_batch_marking_counter[next_node]++;
                flit_ptr->marking_weight = 2;
                flit_ptr->batch_id = current_batch[next_node];

                //if(next_node == 56)
                if(verbose == YES)
                {
                  if(local_batch_marking_counter[next_node] == marking_cap[next_node])
                    printf("Finished marking  batch[%d] :( to %lld  at %lld\n",next_node,current_batch[next_node],sim_clock);
                  else
                    printf("Marking flit:%d counter:%d\n",flit_ptr->flit_num,local_batch_marking_counter[next_node]);
                }

              }
              else
              {
                flit_ptr->marking_weight = 1;
                flit_ptr->batch_id = -1;
              }
            }
            /*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*/
            //local batching time based book keeping
            if(batching_mode[next_node] == BATCHING_LOCAL_TIMEBASED )
            {
              local_batch_marking_counter[next_node]++;
              flit_ptr->batch_id       = last_batch_id[next_node];
              flit_ptr->marking_weight = MAX_BATCH_ID - flit_ptr->batch_id + 1;
            }
            /*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*/
            // end batching support
            /*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*/

            /*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*/
            // start hybrid topology
            /*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*/
            local_msg_len = flit_ptr->llen;
            global_msg_len  = flit_ptr->msglen;

            if(pc == 0 && router_info[node].type == LOCAL && (TAIL_FLIT) && flit_ptr->msgtype != CONTROL)
            {
              flit_t *body_flit_ptr, *tmp;
              if(HEAD_FLIT)
                flit_ptr->flit_type = HEAD;
              else
                flit_ptr->flit_type     = MIDL;

              if(local_msg_len == global_msg_len)
                flit_ptr->flit_type     = TAIL;

              tmp = flit_ptr;
              send_flit(&(router_input_buf[next_node][next_pc][vc]), &flit_ptr);
              for(int pos = local_msg_len; pos <global_msg_len; pos++)
              {
                body_flit_ptr  = flit_alloc();
                make_body_flit(body_flit_ptr,tmp); 
                if(pos == global_msg_len - 1)
                  body_flit_ptr->flit_type  = TAIL;
                tmp = body_flit_ptr;
                if(verbose == YES) 
                { printf("Inject local to global [%d][%d][%d]-flit:%d, dnode:%d dest:%d\n", next_node, next_pc, 
                    vc, body_flit_ptr->flit_num, body_flit_ptr->data.dnode, body_flit_ptr->data.dest); fflush(stdout);}

                    send_flit(&(router_input_buf[next_node][next_pc][vc]), &body_flit_ptr);
              }

              if(verbose == YES)
                print_mbox(&(router_input_buf[next_node][next_pc][vc]));
            }// end split flits case

            else if(router_info[next_node].type == LOCAL)
            {
              if(flit_ptr->pos < local_msg_len)
              {
                if(local_msg_len == 1)
                  flit_ptr->flit_type = (HEAD | TAIL);
                else if(flit_ptr->pos == local_msg_len-1)
                  flit_ptr->flit_type = TAIL;
                send_flit(&(router_input_buf[next_node][next_pc][vc]), &flit_ptr);
              }
              else
              {
                if(verbose == YES) 
                { printf("Squashing global to local [%d][%d][%d]-flit:%d(%s), dnode:%d dest:%d\n", next_node, next_pc, 
                    vc, flit_ptr->flit_num,"MT", flit_ptr->data.dnode, flit_ptr->data.dest); fflush(stdout);
                }
                flit_free(flit_ptr);
              }
            } // end squash flits case
            /*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*/
            // end hybrid topology
            /*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*/
            else
              send_flit(&(router_input_buf[next_node][next_pc][vc]), &flit_ptr);
          }
          // Keep track of the flit activity.
          if(sim_clock > warmup_cycle)
            total_activity[node]++;
        }
      }// for vc
    }// for pc
  }// for node

}
